package git.soulbgm.mapreduce.ETL;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * @author SoulBGM
 * @version 1.0
 * @date 2021/12/4 21:09
 */
public class WebLogMapper extends Mapper<LongWritable, Text, Text, NullWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String line = value.toString();

        if (!parseLog(line)) {
            return;
        }

        context.write(value, NullWritable.get());
    }

    private boolean parseLog(String line) {
        String[] split = line.split("\\s+");
        if (split.length > 11) {
            return true;
        } else {
            return false;
        }
    }
}
